/********************************************************************************
** 	TITLE: make_exhibits.do
**
**	PROJECT: IGNITE
** 
**  PURPOSE: Make main exhibits for updates
**				
**	NOTES:	Put JMS datasets together, will break back in later stages
		//We make no cuts until the end of do-file
//Not dropping observations until after gen_append is created.
********************************************************************************/
set sortseed 13
		set scheme burd4
		
******************************************************
******************************************************
	*Load Data
		use "$output_data/temp_merged_ROA_JMS_IGNITE_$date.dta", clear
		cap drop _merge

* Clean
*************************************

	***get arresting agency
		preserve
		
		import delimited "$input_data/jms_all_22dec2022/inmates_bybookdate_arrestloclodging.csv", clear varname(1)
		ren *1 *
		keep if !mi(arrestingagency)
		duplicates drop inmate, force
		keep inmate arrestingagency
		tempfile agency
		save `agency'
		restore
		
		merge m:1 inmate using `agency', nogen keep(1 3)

	***merge zipcode
	
	preserve
	
	import excel  "$input_data/census_tract/parsed_all.xlsx", clear firstrow
	replace countyfp="00"+countyfp if length(countyfp)==1
	replace countyfp="0"+countyfp if length(countyfp)==2
	gen id = countyfp+tract
	duplicates drop currentlocation, force
	
	merge m:1 id using "$output_data/census_vars.dta", nogen keep(3)
	
	tempfile parsed_all
	save `parsed_all'
	restore
	
	merge m:1 currentlocation using `parsed_all', nogen keep(1 3)
	
	
		preserve
		keep if mi(zipcode)
		drop zipcode
		replace currentlocation = subinstr(currentlocation," ","",.)
		
		merge m:1 currentlocation using "$input_data/zipcode_vars_matched.dta", nogen keep(1 3) keepusing(zipcode)
		tempfile newbatch
		save `newbatch'
		restore
		
		drop if mi(zipcode)
		append using `newbatch'
		replace zipcode = "48505" if zipcode=="72223"|zipcode=="21117"|zipcode=="54968"|zipcode=="27325"|zipcode=="97601"|zipcode=="78744"
		replace zipcode = "48504" if zipcode=="20852"
		replace zipcode = "48506" if zipcode=="63385"
		replace zipcode = "48420" if zipcode=="25005"
		replace zipcode = "48458" if zipcode=="61054"|zipcode=="14510"
				
		gen log_med_hhincome = log(med_hhincome)
		
		
		**closure dummy
		gen D_closed = (ym_booked>= mofd(date("11/10/2020","MDY")) & ym<=mofd(date("05/10/2021","MDY")) )| ///
			(ym>= mofd(date("03/17/2020","MDY")) & ym<=mofd(date("06/23/2020","MDY")))
				
		gen stay_day = stay_orig_misc/10
	
		gen rec_prison = return_3m==1|prison==1
		
		gen D_other_nw = D_Black==0&D_White==0
		
		gen age_17_24  = current_age>=17&current_age<=24
		gen age_25_34  = current_age>=25&current_age<=34
		gen age_35_44  = current_age>=35&current_age<=44
		gen age_45_54  = current_age>=45&current_age<=54
		gen age_55_64  = current_age>=55&current_age<=64
		gen age_65plus  = current_age>=65


		gen cat_num_charge = num_c if num_c<=4
		replace cat_num_charge = 5 if num_c>=5 & num_c<10
		replace cat_num_charge = 6 if num_c>=10 & num_c<20
		replace cat_num_charge = 7 if num_c>=20
		
		foreach t in EX FD FT FY OD  OI OM  OT  SD SM  ST{
			gen `t' = 0
		}
		forvalues j=1/11{
			gen type`j'=substr(case_id`j',-2,.)

			foreach t in EX FD FT FY OD  OI OM  OT  SD SM  ST{
				replace `t'=1 if type`j'=="`t'"
			}
		}
		replace D_felony=1 if EX+FD+FT+FY>0
		replace zipcode="" if mi(total_pop)
		
		
		cap drop cat_division
		gen cat_division = 999
		replace cat_division = 1 if strpos(lower(arrestingagency),"flushing")|strpos(lower(arrestingagency),"flint twp")|strpos(lower(arrestingagency),"clio")| ///
			strpos(lower(arrestingagency),"montrose")|strpos(lower(arrestingagency),"thetford")|strpos(lower(arrestingagency),"vienna")
		replace cat_division = 2 if strpos(lower(arrestingagency),"davison")|strpos(lower(arrestingagency),"forest")|strpos(lower(arrestingagency),"goodrich") ///
			|strpos(lower(arrestingagency),"otisville")|strpos(lower(arrestingagency),"otter")|strpos(lower(arrestingagency),"richfield")
		replace cat_division = 3 if strpos(lower(arrestingagency),"burton")
		replace cat_division = 4 if strpos(lower(arrestingagency),"morris")|strpos(lower(arrestingagency),"genesee twp")
		replace cat_division = 5 if strpos(lower(arrestingagency),"fenton")|strpos(lower(arrestingagency),"argentine")| ///
			strpos(lower(arrestingagency),"clay")|strpos(lower(arrestingagency),"gaines")| ///
			strpos(lower(arrestingagency),"lennon")|strpos(lower(arrestingagency),"linden")|strpos(lower(arrestingagency),"creek")
		replace cat_division = 6 if strpos(lower(arrestingagency),"blanc")|strpos(lower(arrestingagency),"mundy")
		replace cat_division = 7 if strpos(lower(arrestingagency),"flint pd")| strpos(lower(arrestingagency),"msp")|strpos(lower(arrestingagency),"mdoc")
		replace cat_division = 8 if strpos(lower(arrestingagency),"genesee co")
		
		bys zipcode: gen blockcount=_N
		gen D_mi=blockcount>6 if booking_year>2015&!mi(release_date)
		
		egen mis_dui = rowmax(OD  SD)
		egen mis_traffic = rowmax(OT  ST)
		egen mis_crime = rowmax(OM  SM)
		egen D_health = rowmax(D_med D_sui)
		gen num_health = num_med + num_sui
		gen pctebll = ebll/cnttested
		gen pct18plusebll = (ebll-under18ebll)/(cnttested-under18cnttested) * 100
		replace pct18plusebll = 100 if pct18plusebll>100
		cap drop young
		gen young = current_age<=24
			
		foreach v in pc_hs pc_black pct18plusebll pc_clg {
			replace `v' = `v'/100
		}
		
		foreach v in pctebll pct18plusebll pc_hs log_med_hhincome pc_black pc_clg{
			replace `v' = -1 if mi(`v')
		}
		
		egen temp = rowmin(pctebll pct18plusebll pc_hs log_med_hhincome pc_black)
		gen D_missing = temp==-1
		drop temp
		la var D_missing "Missing Census Tract Controls"
		
	***** Updating Recidivism definition [within county recidivism]		
		forvalues j=1/12 {
			replace return_`j'm  = 0 if mi(return_`j'm )
			replace return_`j'm = -9 if prison==1
			replace return_`j'm = -10 if (date("05/10/2023","MDY")-release_date)<`j'*30
			gen attrit_`j'm = ( (return_`j'm == -10) &!prison)	
		}
		
		foreach v in charged_fel_3m charged_3m num_charged_3m{
		
			replace `v' = -9 if return_3m<0
		
		}	
		
		
			foreach v in 3 6 9 12{
			replace crime_costs_`v'm=-999 if return_`v'm<0
			replace  num_return_`v'm=-9 if return_`v'm<0
		foreach crime in person property po dui drug weapons traffic_other crime_other{	
			replace num_`crime'_`v'm=-9 if return_`v'm<0
			}		
			}
			
		replace time_to_maj=time_to_maj/30
		forvalues j=1/8{
			gen D_maj_`j'week = time_to_maj<0.25*`j' & D_maj==1
		}
		
	* major misconduct
		gen num_maj_weekly = num_maj/(30*stay_misc/7) 
		replace num_maj_weekly=0 if num_maj==0
		replace num_maj_weekly=num_maj if stay_misc ==0
	
	* minor misconduct
		gen num_min_weekly = num_min/(30*stay_misc/7) 
		replace num_min_weekly=0 if num_min==0|mi(num_min)
		replace num_min_weekly=num_min if stay_misc==0
		
		foreach v of varlist num_*_3m {
			replace `v' = -9 if return_3m<0
		}
		foreach incdt in maj med sui health incident {
			gen D_`incdt'_weekly = num_`incdt'/(stay_misc*30/7)
			replace D_`incdt'_weekly=0 if mi(D_`incdt'_weekly)
		}


	*Dow_scheduled for missing observations
		replace dow_scheduled = 99 if missing(dow_scheduled)
			
		gen D_maj_fm=time_to_maj<1
		
		gen bonded = release_reason == "Bonded"
		gen tether = release_reason=="Tether"
		
	*Genesee County Jail Identifier
		gen genesee = 1
	
//Defining IGNITE_0 and stay for recidivism
	* length of stay in months
		gen stay_rec = .
		replace stay_rec = (release_date - booking_date)/30
		drop if stay_rec <0
		
		gen ignite_0_rec = .
		replace ignite_0_rec = (release_date - booking_date)/30  if booking_date >= date("09/08/2020","MDY")
		replace ignite_0_rec = (release_date - date("09/08/2020","MDY"))/30 if booking_date < date("09/08/2020","MDY")
		replace ignite_0_rec = 0 if ignite_0_rec<0	
		
	* Saving resulting dataset to append with Saginaw in B8	
		save "$output_data/gen_appended.dta", replace


****** perform all the cuts now
	drop release_date
		merge 1:1 inmate booking_date using "$output_data/001_merge_JMS_incident_$date.dta", ///
			keepusing(release_date) nogen keep(3)
		keep if ((D_Black==1|D_White==1)&D_mi==1)|booking_year==2015|mi(release_date)
		drop if mi(release_date)
		drop if mi(D_remove_any)

		
keep if ym_booked < mofd(date("06/01/2022","MDY"))  //749 // keep < Jun 2022

	* drop if no roa
		preserve
			import excel "$input_data/roas_district/no_roa.xlsx", clear firstrow
			tempfile no_roa
			save `no_roa'
		restore
		merge 1:1 inmate booking_date  using `no_roa', nogen keep(1)
		
		preserve
	* Predict recidivism using 2015 sample
		local v return_3m

		//Predicting using our design and auxiliary controls 
		cap drop return_3m_hat 
		logit `v' $design_control $auxiliary_control ///
			if `v'>=0& booking_year<=2015, cluster(inmate)
		predict `v'_hat, pr

		su `v'_hat, de

		local v num_maj_weekly

		cap drop num_maj_weekly_hat 
		logit `v' $design_control $auxiliary_control ///
			if `v'>=0& booking_year<=2015, cluster(inmate)
		predict `v'_hat, pr

		gen e_sample = (booking_year>2015)

	//Producing the subsequent file as a temp as it will be updated later. 
		//File meant to identify relevant observations in study. 
		//recidivism will be updated later with Saginaw County. 
		save "$output_data/prediction_temp.dta", replace
		restore
		